# -*- coding=utf-8 -*-
# @Time : 2020-01-02
# @Author : rubick

import requests
from lxml import etree
import json

headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15',
}

def crawl_xici():
    """
    西刺代理：https://www.xicidaili.com
    """
    url = "https://www.xicidaili.com/{}"
    items = []
    for page in range(1, 2):
        items.append(("wt/{}".format(page), "http://{}:{}"))
        items.append(("wn/{}".format(page), "https://{}:{}"))

    for item in items:
        proxy_type, host = item
        print(url.format(proxy_type))
        response = requests.get(url.format(proxy_type), headers=headers)
        html = response.text
        dom = etree.HTML(html)
        ips = dom.xpath("//table/tr[@class='odd']/td[2]/text()")
        ports = dom.xpath("//table/tr[@class='odd']/td[3]/text()")
        for ip, port in zip(ips, ports):
            if ip and port:
                yield host.format(ip, port)


crawl_xici()